library(tidyverse)
── Attaching core tidyverse packages ────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
airbnb_palette <- c("#FF5A5F", "#00A699", "#767676", "#484848", "#FFB400")
main_uncleaned_data <- read_csv("airbnb1.csv")
Rows: 1010 Columns: 25── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr  (3): property_type, room_type, cancellation_policy
dbl (22): accommodates, bathrooms, bedrooms, beds, amenities_count, minimum_nights, maximum_nights, number_of_reviews, review_...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(main_uncleaned_data)

summary(main_uncleaned_data)
 property_type       room_type          accommodates     bathrooms        bedrooms           beds       amenities_count
 Length:1010        Length:1010        Min.   :1.000   Min.   :1.000   Min.   : 1.000   Min.   :1.000   Min.   : 5.0   
 Class :character   Class :character   1st Qu.:3.000   1st Qu.:2.000   1st Qu.: 2.000   1st Qu.:2.000   1st Qu.:16.0   
 Mode  :character   Mode  :character   Median :5.000   Median :3.000   Median : 3.000   Median :3.000   Median :28.0   
                                       Mean   :4.979   Mean   :2.554   Mean   : 3.235   Mean   :3.045   Mean   :27.7   
                                       3rd Qu.:7.000   3rd Qu.:4.000   3rd Qu.: 4.000   3rd Qu.:4.000   3rd Qu.:39.0   
                                       Max.   :9.000   Max.   :4.000   Max.   :92.000   Max.   :6.000   Max.   :49.0   
                                       NA's   :5       NA's   :5       NA's   :5        NA's   :5       NA's   :5      
 minimum_nights  maximum_nights   number_of_reviews review_scores_rating reviews_per_month availability_30 availability_60
 Min.   :1.000   Min.   :  30.0   Min.   : 1.000    Min.   :1.000        Min.   :0.000     Min.   : 0.00   Min.   : 0.00  
 1st Qu.:3.000   1st Qu.: 308.0   1st Qu.: 8.000    1st Qu.:2.000        1st Qu.:1.300     1st Qu.: 7.00   1st Qu.:16.00  
 Median :5.000   Median : 599.0   Median :10.000    Median :3.000        Median :2.600     Median :16.00   Median :30.00  
 Mean   :5.026   Mean   : 589.1   Mean   : 9.946    Mean   :2.966        Mean   :2.534     Mean   :15.25   Mean   :30.54  
 3rd Qu.:7.000   3rd Qu.: 870.0   3rd Qu.:12.000    3rd Qu.:4.000        3rd Qu.:3.800     3rd Qu.:24.00   3rd Qu.:46.00  
 Max.   :9.000   Max.   :1124.0   Max.   :25.000    Max.   :5.000        Max.   :5.000     Max.   :30.00   Max.   :60.00  
 NA's   :5       NA's   :5        NA's   :5         NA's   :5            NA's   :5         NA's   :5       NA's   :5      
 availability_90 availability_365 instant_bookable host_identity_verified   host_since   cancellation_policy    latitude    
 Min.   : 0.00   Min.   :  0.0    Min.   :0.0000   Min.   :0.000          Min.   :  31   Length:1010         Min.   :40.50  
 1st Qu.:23.00   1st Qu.: 99.0    1st Qu.:0.0000   1st Qu.:1.000          1st Qu.: 842   Class :character    1st Qu.:40.60  
 Median :44.00   Median :183.0    Median :0.0000   Median :1.000          Median :1771   Mode  :character    Median :40.70  
 Mean   :44.38   Mean   :184.5    Mean   :0.4836   Mean   :0.798          Mean   :1791                       Mean   :40.70  
 3rd Qu.:67.00   3rd Qu.:271.0    3rd Qu.:1.0000   3rd Qu.:1.000          3rd Qu.:2773                       3rd Qu.:40.81  
 Max.   :90.00   Max.   :365.0    Max.   :1.0000   Max.   :1.000          Max.   :3649                       Max.   :40.90  
 NA's   :5       NA's   :5        NA's   :5        NA's   :5              NA's   :5                          NA's   :5      
   longitude      guests_included  extra_people       price       
 Min.   :-74.00   Min.   :1.00    Min.   : 0.00   Min.   :-304.0  
 1st Qu.:-73.94   1st Qu.:1.00    1st Qu.:13.00   1st Qu.: 271.0  
 Median :-73.86   Median :2.00    Median :25.00   Median : 318.0  
 Mean   :-73.86   Mean   :2.48    Mean   :25.14   Mean   : 316.6  
 3rd Qu.:-73.78   3rd Qu.:4.00    3rd Qu.:38.00   3rd Qu.: 363.0  
 Max.   :-73.70   Max.   :4.00    Max.   :49.00   Max.   : 566.0  
 NA's   :5        NA's   :5       NA's   :6       NA's   :5       
glimpse(main_uncleaned_data)
Rows: 1,010
Columns: 25
$ property_type          <chr> "Apartment", "Townhouse", "Apartment", "Apartment", "House", "Loft", "House", "House", "Apartment…
$ room_type              <chr> "Entire home/apt", "Entire home/apt", "Entire home/apt", "Entire home/apt", "Entire home/apt", "P…
$ accommodates           <dbl> 6, 2, 2, 4, 8, 5, 3, 2, 5, 3, 5, 3, 9, 4, 4, 4, 1, 1, 2, 4, 8, 8, 2, 5, 3, 3, 4, 4, 3, 3, 2, 5, 2…
$ bathrooms              <dbl> 1, 4, 1, 4, 1, 3, 1, 3, 4, 2, 4, 3, 1, 4, 4, 2, 2, 2, 1, 1, 3, 3, 4, 1, 1, 4, 4, 1, 1, 1, 2, 1, 2…
$ bedrooms               <dbl> 3, 1, 4, 2, 2, 2, 1, 5, 2, 4, 3, NA, 3, 1, 5, 4, 4, 5, 5, 4, 5, 3, 3, 5, 4, 1, 2, 2, 2, 2, 2, 3, …
$ beds                   <dbl> 2, 1, 4, 1, 1, 1, 1, 5, 3, 5, 4, 2, 3, 1, 6, 5, 5, 5, 6, 4, 4, 3, 2, 6, 5, 1, 1, 2, 2, 2, 2, 4, 2…
$ amenities_count        <dbl> 41, 31, 8, 49, 23, 40, 20, 14, 25, 49, 48, 9, 39, 37, 35, 19, 27, 11, 16, 38, 28, 48, 8, 30, 22, …
$ minimum_nights         <dbl> 2, 2, 1, 9, 5, 3, 8, 1, 7, 8, 2, 5, 3, 6, 6, 4, 5, 8, 8, NA, 3, 6, 7, 3, 2, 8, 6, 6, 9, 3, 6, 8, …
$ maximum_nights         <dbl> 503, 83, 256, 714, 55, 869, 486, 817, 697, 1101, 328, 300, 984, 312, 1051, 411, 789, 398, 1111, 5…
$ number_of_reviews      <dbl> 10, 7, 9, 8, 16, 9, 10, 8, 8, 7, 8, 7, 7, 10, 11, 12, 8, 3, 12, 9, 9, 10, 15, 16, 14, 10, 11, 15,…
$ review_scores_rating   <dbl> 2, 1, 2, 5, 2, 4, 1, 3, 5, 1, 2, 4, 4, 3, 1, 2, 3, 3, 1, 2, 3, 1, 1, 4, 3, 5, 3, NA, 5, 2, 1, 1, …
$ reviews_per_month      <dbl> 2.8, 4.9, 4.1, 2.6, 3.8, 1.6, 5.0, 3.7, 4.1, 4.9, 3.0, 1.8, 4.7, 3.4, 1.2, 3.7, 3.6, 2.2, 3.3, 4.…
$ availability_30        <dbl> 23, 19, 2, 8, 5, 0, 24, 9, 28, 2, 28, 15, 5, 26, 28, 26, 29, 2, 23, 0, 11, 3, 28, 16, 26, 22, 16,…
$ availability_60        <dbl> 56, 57, 37, 7, 14, 22, 45, 58, 26, 37, 19, 49, 49, 9, 40, 51, 2, 53, 54, 31, 27, 49, 52, 23, 55, …
$ availability_90        <dbl> 41, 87, 65, 34, 57, 47, 83, 77, 78, 12, 21, 77, 8, 71, 23, 50, 9, 89, 0, 23, 70, 44, 28, 26, 5, 8…
$ availability_365       <dbl> 18, 196, 278, 263, 30, 44, 185, 230, 336, 18, 221, 60, 52, 72, 233, 121, 62, 242, 217, 249, 80, 2…
$ instant_bookable       <dbl> 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0…
$ host_identity_verified <dbl> 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0…
$ host_since             <dbl> 1652, 1126, 1728, 3264, 902, 2773, 771, 424, 1952, 552, 1318, 2494, 1507, 1014, 2518, 3486, 3567,…
$ cancellation_policy    <chr> "flexible", "flexible", "strict", "strict_14_with_grace_period", "strict_14_with_grace_period", "…
$ latitude               <dbl> 40.72955, 40.62440, 40.59282, 40.71197, 40.85087, 40.85365, 40.88695, 40.62214, 40.87585, 40.5826…
$ longitude              <dbl> -73.93356, -73.88089, -73.74172, -73.91624, -73.72905, -73.86373, -73.76245, -73.84639, -73.86990…
$ guests_included        <dbl> 4, 4, 4, 1, 4, 1, 2, 4, 4, 2, 1, 4, 2, 2, 4, 1, 4, 4, 1, 1, 1, 4, 3, 4, 3, 3, 1, 3, 2, 2, 3, 1, 1…
$ extra_people           <dbl> 3, 41, 5, 6, 6, 31, 19, 34, 29, 9, 47, 11, 0, 21, 7, 29, 42, 17, 24, 14, 10, 3, 0, 4, 3, 49, 6, 3…
$ price                  <dbl> 327, 335, 213, 399, 301, 319, 272, 344, 307, 341, 397, 264, 321, 408, 504, 285, 346, 293, 322, 28…
# Calculate the percentage of missing data for each column
missing_data <- main_uncleaned_data %>%
  summarise(across(everything(), ~ mean(is.na(.)) * 100)) %>%
  pivot_longer(cols = everything(), names_to = "Variable", values_to = "Missing_Percentage")

# Plot the missing data percentage
ggplot(missing_data, aes(x = reorder(Variable, -Missing_Percentage), y = Missing_Percentage)) +
  geom_bar(stat = "identity", fill = "#Ffb400") + 
  labs(title = "Percentage of Missing Data by Column", x = "Variable", y = "Missing Percentage (%)") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45)) +
  coord_flip()

NA
which(is.na(main_uncleaned_data))
  [1]   388   420   540   844   967  1123  1399  1874  1894  2006  2132  2221  2356  2657  2990  3121  3247  3746  3920  3945
 [21]  4052  4121  4598  4669  4956  5227  5268  5587  5935  5950  6122  6232  6483  6546  6617  7090  7245  7375  7416  7824
 [41]  8387  8642  8662  9032  9063  9157  9158  9201  9471  9562 10128 10181 10347 10961 11001 11193 11278 11331 11575 12046
 [61] 12321 12410 12567 12727 12839 13294 13502 13607 13921 14007 14331 14338 14441 14609 15039 15219 15595 15977 16001 16095
 [81] 16291 16495 16640 17123 17126 17297 17333 17455 18134 18142 18242 18473 19057 19060 19129 19456 19600 19630 19840 20155
[101] 20564 20658 20785 21059 21170 21478 21515 21551 21746 21862 22474 22599 22819 23092 23109 23548 23934 24002 24087 24147
[121] 24231 24375 24628 24656 24692 25018
col_missing <- colSums(is.na(main_uncleaned_data))

col_missing
         property_type              room_type           accommodates              bathrooms               bedrooms 
                     5                      5                      5                      5                      5 
                  beds        amenities_count         minimum_nights         maximum_nights      number_of_reviews 
                     5                      5                      5                      5                      5 
  review_scores_rating      reviews_per_month        availability_30        availability_60        availability_90 
                     5                      5                      5                      5                      5 
      availability_365       instant_bookable host_identity_verified             host_since    cancellation_policy 
                     5                      5                      5                      5                      5 
              latitude              longitude        guests_included           extra_people                  price 
                     5                      5                      5                      6                      5 
dropped_na <- main_uncleaned_data %>% drop_na()

which(is.na(dropped_na)) # just checking if the cleaning worked or not
integer(0)
Q1 <- quantile(dropped_na$bedrooms, 0.25, na.rm = TRUE)
Q3 <- quantile(dropped_na$bedrooms, 0.75, na.rm = TRUE)
IQR <- Q3 - Q1

cleaned_airbnb_data <- dropped_na %>%
  filter(bedrooms >= (Q1 - 1.5 * IQR) & bedrooms <= (Q3 + 1.5 * IQR))

glimpse(cleaned_airbnb_data)
Rows: 886
Columns: 25
$ property_type          <chr> "Apartment", "Townhouse", "Apartment", "Apartment", "House", "Loft", "House", "House", "Apartment…
$ room_type              <chr> "Entire home/apt", "Entire home/apt", "Entire home/apt", "Entire home/apt", "Entire home/apt", "P…
$ accommodates           <dbl> 6, 2, 2, 4, 8, 5, 3, 2, 5, 3, 5, 9, 4, 4, 4, 1, 1, 2, 8, 8, 2, 5, 3, 3, 4, 3, 3, 2, 5, 2, 7, 4, 7…
$ bathrooms              <dbl> 1, 4, 1, 4, 1, 3, 1, 3, 4, 2, 4, 1, 4, 4, 2, 2, 2, 1, 3, 3, 4, 1, 1, 4, 4, 1, 1, 2, 1, 2, 4, 4, 2…
$ bedrooms               <dbl> 3, 1, 4, 2, 2, 2, 1, 5, 2, 4, 3, 3, 1, 5, 4, 4, 5, 5, 5, 3, 3, 5, 4, 1, 2, 2, 2, 2, 3, 3, 1, 5, 2…
$ beds                   <dbl> 2, 1, 4, 1, 1, 1, 1, 5, 3, 5, 4, 3, 1, 6, 5, 5, 5, 6, 4, 3, 2, 6, 5, 1, 1, 2, 2, 2, 4, 2, 1, 5, 2…
$ amenities_count        <dbl> 41, 31, 8, 49, 23, 40, 20, 14, 25, 49, 48, 39, 37, 35, 19, 27, 11, 16, 28, 48, 8, 30, 22, 24, 39,…
$ minimum_nights         <dbl> 2, 2, 1, 9, 5, 3, 8, 1, 7, 8, 2, 3, 6, 6, 4, 5, 8, 8, 3, 6, 7, 3, 2, 8, 6, 9, 3, 6, 8, 8, 7, 8, 8…
$ maximum_nights         <dbl> 503, 83, 256, 714, 55, 869, 486, 817, 697, 1101, 328, 984, 312, 1051, 411, 789, 398, 1111, 184, 3…
$ number_of_reviews      <dbl> 10, 7, 9, 8, 16, 9, 10, 8, 8, 7, 8, 7, 10, 11, 12, 8, 3, 12, 9, 10, 15, 16, 14, 10, 11, 8, 9, 13,…
$ review_scores_rating   <dbl> 2, 1, 2, 5, 2, 4, 1, 3, 5, 1, 2, 4, 3, 1, 2, 3, 3, 1, 3, 1, 1, 4, 3, 5, 3, 5, 2, 1, 1, 2, 4, 4, 1…
$ reviews_per_month      <dbl> 2.8, 4.9, 4.1, 2.6, 3.8, 1.6, 5.0, 3.7, 4.1, 4.9, 3.0, 4.7, 3.4, 1.2, 3.7, 3.6, 2.2, 3.3, 0.6, 2.…
$ availability_30        <dbl> 23, 19, 2, 8, 5, 0, 24, 9, 28, 2, 28, 5, 26, 28, 26, 29, 2, 23, 11, 3, 28, 16, 26, 22, 16, 18, 25…
$ availability_60        <dbl> 56, 57, 37, 7, 14, 22, 45, 58, 26, 37, 19, 49, 9, 40, 51, 2, 53, 54, 27, 49, 52, 23, 55, 4, 9, 47…
$ availability_90        <dbl> 41, 87, 65, 34, 57, 47, 83, 77, 78, 12, 21, 8, 71, 23, 50, 9, 89, 0, 70, 44, 28, 26, 5, 86, 82, 2…
$ availability_365       <dbl> 18, 196, 278, 263, 30, 44, 185, 230, 336, 18, 221, 52, 72, 233, 121, 62, 242, 217, 80, 201, 352, …
$ instant_bookable       <dbl> 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0…
$ host_identity_verified <dbl> 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1…
$ host_since             <dbl> 1652, 1126, 1728, 3264, 902, 2773, 771, 424, 1952, 552, 1318, 1507, 1014, 2518, 3486, 3567, 1618,…
$ cancellation_policy    <chr> "flexible", "flexible", "strict", "strict_14_with_grace_period", "strict_14_with_grace_period", "…
$ latitude               <dbl> 40.72955, 40.62440, 40.59282, 40.71197, 40.85087, 40.85365, 40.88695, 40.62214, 40.87585, 40.5826…
$ longitude              <dbl> -73.93356, -73.88089, -73.74172, -73.91624, -73.72905, -73.86373, -73.76245, -73.84639, -73.86990…
$ guests_included        <dbl> 4, 4, 4, 1, 4, 1, 2, 4, 4, 2, 1, 2, 2, 4, 1, 4, 4, 1, 1, 4, 3, 4, 3, 3, 1, 2, 2, 3, 1, 1, 1, 4, 3…
$ extra_people           <dbl> 3, 41, 5, 6, 6, 31, 19, 34, 29, 9, 47, 0, 21, 7, 29, 42, 17, 24, 10, 3, 0, 4, 3, 49, 6, 21, 23, 4…
$ price                  <dbl> 327, 335, 213, 399, 301, 319, 272, 344, 307, 341, 397, 321, 408, 504, 285, 346, 293, 322, 342, 46…
numeric_vars <- c("bathrooms", "bedrooms", "beds", "amenities_count", "minimum_nights", "maximum_nights", "number_of_reviews", "review_scores_rating", "price")

categorical_vars <- c("property_type", "room_type", "cancellation_policy", "host_identity_verifed")
par(mfrow = c(2, 4)) # Set up plotting layout

# Create a histogram for each variable
lapply(numeric_vars, function(var) {
  hist(dropped_na[[var]], 
       main = paste("Histogram of", var), 
       xlab = var, 
       col = "#FFB400", 
       border = "white"
       )
})

[[1]]
$breaks
 [1] 1.0 1.2 1.4 1.6 1.8 2.0 2.2 2.4 2.6 2.8 3.0 3.2 3.4 3.6 3.8 4.0

$counts
 [1] 211   0   0   0 233   0   0   0   0 202   0   0   0   0 245

$density
 [1] 1.184063 0.000000 0.000000 0.000000 1.307520 0.000000 0.000000 0.000000 0.000000 1.133558 0.000000 0.000000 0.000000 0.000000
[15] 1.374860

$mids
 [1] 1.1 1.3 1.5 1.7 1.9 2.1 2.3 2.5 2.7 2.9 3.1 3.3 3.5 3.7 3.9

$xname
[1] "dropped_na[[var]]"

$equidist
[1] TRUE

attr(,"class")
[1] "histogram"

[[2]]
$breaks
 [1]   0  10  20  30  40  50  60  70  80  90 100

$counts
 [1] 886   0   1   0   2   0   1   0   0   1

$density
 [1] 0.0994388328 0.0000000000 0.0001122334 0.0000000000 0.0002244669 0.0000000000 0.0001122334 0.0000000000 0.0000000000
[10] 0.0001122334

$mids
 [1]  5 15 25 35 45 55 65 75 85 95

$xname
[1] "dropped_na[[var]]"

$equidist
[1] TRUE

attr(,"class")
[1] "histogram"

[[3]]
$breaks
 [1] 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 5.5 6.0

$counts
 [1] 183 179   0 163   0 180   0 129   0  57

$density
 [1] 0.4107744 0.4017957 0.0000000 0.3658810 0.0000000 0.4040404 0.0000000 0.2895623 0.0000000 0.1279461

$mids
 [1] 1.25 1.75 2.25 2.75 3.25 3.75 4.25 4.75 5.25 5.75

$xname
[1] "dropped_na[[var]]"

$equidist
[1] TRUE

attr(,"class")
[1] "histogram"

[[4]]
$breaks
 [1]  5 10 15 20 25 30 35 40 45 50

$counts
[1] 110  98  78 117  85 102 110  97  94

$density
[1] 0.02469136 0.02199776 0.01750842 0.02626263 0.01907969 0.02289562 0.02469136 0.02177329 0.02109989

$mids
[1]  7.5 12.5 17.5 22.5 27.5 32.5 37.5 42.5 47.5

$xname
[1] "dropped_na[[var]]"

$equidist
[1] TRUE

attr(,"class")
[1] "histogram"

[[5]]
$breaks
[1] 1 2 3 4 5 6 7 8 9

$counts
[1] 193 122  86  95  83 109 107  96

$density
[1] 0.21661055 0.13692480 0.09652076 0.10662177 0.09315376 0.12233446 0.12008979 0.10774411

$mids
[1] 1.5 2.5 3.5 4.5 5.5 6.5 7.5 8.5

$xname
[1] "dropped_na[[var]]"

$equidist
[1] TRUE

attr(,"class")
[1] "histogram"

[[6]]
$breaks
 [1]    0  100  200  300  400  500  600  700  800  900 1000 1100 1200

$counts
 [1] 56 75 85 92 71 68 73 84 93 75 91 28

$density
 [1] 0.0006285073 0.0008417508 0.0009539843 0.0010325477 0.0007968575 0.0007631874 0.0008193042 0.0009427609 0.0010437710
[10] 0.0008417508 0.0010213244 0.0003142536

$mids
 [1]   50  150  250  350  450  550  650  750  850  950 1050 1150

$xname
[1] "dropped_na[[var]]"

$equidist
[1] TRUE

attr(,"class")
[1] "histogram"

[[7]]
$breaks
 [1]  0  2  4  6  8 10 12 14 16 18 20 22 24 26

$counts
 [1]   5  21  90 186 239 180  97  51  13   6   1   1   1

$density
 [1] 0.0028058361 0.0117845118 0.0505050505 0.1043771044 0.1341189675 0.1010101010 0.0544332211 0.0286195286 0.0072951740
[10] 0.0033670034 0.0005611672 0.0005611672 0.0005611672

$mids
 [1]  1  3  5  7  9 11 13 15 17 19 21 23 25

$xname
[1] "dropped_na[[var]]"

$equidist
[1] TRUE

attr(,"class")
[1] "histogram"

[[8]]
$breaks
[1] 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0

$counts
[1] 124 236   0 202   0 216   0 113

$density
[1] 0.2783389 0.5297419 0.0000000 0.4534231 0.0000000 0.4848485 0.0000000 0.2536476

$mids
[1] 1.25 1.75 2.25 2.75 3.25 3.75 4.25 4.75

$xname
[1] "dropped_na[[var]]"

$equidist
[1] TRUE

attr(,"class")
[1] "histogram"

[[9]]
$breaks
 [1] -400 -300 -200 -100    0  100  200  300  400  500  600

$counts
 [1]   1   3   1   0   3  39 311 423 104   6

$density
 [1] 1.122334e-05 3.367003e-05 1.122334e-05 0.000000e+00 3.367003e-05 4.377104e-04 3.490460e-03 4.747475e-03 1.167228e-03
[10] 6.734007e-05

$mids
 [1] -350 -250 -150  -50   50  150  250  350  450  550

$xname
[1] "dropped_na[[var]]"

$equidist
[1] TRUE

attr(,"class")
[1] "histogram"

par(mfrow = c(2, 4)) # 2 rows, 4 columns

# Create a boxplot for each variable
lapply(numeric_vars, function(var) {
  boxplot(dropped_na[[var]], 
          main = paste("Boxplot of", var), 
          ylab = var, 
          col = "#00A699")
})

[[1]]
[[1]]$stats
     [,1]
[1,]    1
[2,]    2
[3,]    3
[4,]    4
[5,]    4

[[1]]$n
[1] 891

[[1]]$conf
         [,1]
[1,] 2.894136
[2,] 3.105864

[[1]]$out
numeric(0)

[[1]]$group
numeric(0)

[[1]]$names
[1] ""


[[2]]
[[2]]$stats
     [,1]
[1,]    1
[2,]    2
[3,]    3
[4,]    4
[5,]    5

[[2]]$n
[1] 891

[[2]]$conf
         [,1]
[1,] 2.894136
[2,] 3.105864

[[2]]$out
[1] 44 92 70 28 45

[[2]]$group
[1] 1 1 1 1 1

[[2]]$names
[1] ""


[[3]]
[[3]]$stats
     [,1]
[1,]    1
[2,]    2
[3,]    3
[4,]    4
[5,]    6

[[3]]$n
[1] 891

[[3]]$conf
         [,1]
[1,] 2.894136
[2,] 3.105864

[[3]]$out
numeric(0)

[[3]]$group
numeric(0)

[[3]]$names
[1] ""


[[4]]
[[4]]$stats
     [,1]
[1,]    5
[2,]   16
[3,]   28
[4,]   39
[5,]   49

[[4]]$n
[1] 891

[[4]]$conf
         [,1]
[1,] 26.78256
[2,] 29.21744

[[4]]$out
numeric(0)

[[4]]$group
numeric(0)

[[4]]$names
[1] ""


[[5]]
[[5]]$stats
     [,1]
[1,]    1
[2,]    3
[3,]    5
[4,]    7
[5,]    9

[[5]]$n
[1] 891

[[5]]$conf
         [,1]
[1,] 4.788272
[2,] 5.211728

[[5]]$out
numeric(0)

[[5]]$group
numeric(0)

[[5]]$names
[1] ""


[[6]]
[[6]]$stats
       [,1]
[1,]   30.0
[2,]  307.0
[3,]  598.0
[4,]  868.5
[5,] 1124.0

[[6]]$n
[1] 891

[[6]]$conf
         [,1]
[1,] 568.2787
[2,] 627.7213

[[6]]$out
numeric(0)

[[6]]$group
numeric(0)

[[6]]$names
[1] ""


[[7]]
[[7]]$stats
     [,1]
[1,]    2
[2,]    8
[3,]   10
[4,]   12
[5,]   18

[[7]]$n
[1] 891

[[7]]$conf
          [,1]
[1,]  9.788272
[2,] 10.211728

[[7]]$out
 [1] 19 25 19  1 24  1 19 19 21 20 20

[[7]]$group
 [1] 1 1 1 1 1 1 1 1 1 1 1

[[7]]$names
[1] ""


[[8]]
[[8]]$stats
     [,1]
[1,]    1
[2,]    2
[3,]    3
[4,]    4
[5,]    5

[[8]]$n
[1] 891

[[8]]$conf
         [,1]
[1,] 2.894136
[2,] 3.105864

[[8]]$out
numeric(0)

[[8]]$group
numeric(0)

[[8]]$names
[1] ""


[[9]]
[[9]]$stats
      [,1]
[1,] 133.0
[2,] 270.5
[3,] 317.0
[4,] 363.0
[5,] 493.0

[[9]]$n
[1] 891

[[9]]$conf
         [,1]
[1,] 312.1038
[2,] 321.8962

[[9]]$out
 [1]  504 -304   99  515   97 -286  544 -156  566 -265   99  503  544 -258

[[9]]$group
 [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1

[[9]]$names
[1] ""

There are negative price outliers, so we’ll filter that out

# removing negative price values

cleaned_airbnb_data <- cleaned_airbnb_data %>%
  filter(price > 0)
par(mfrow = c(2, 4)) # Set up plotting layout

# Create a histogram for each variable
lapply(numeric_vars, function(var) {
  hist(cleaned_airbnb_data[[var]], 
       main = paste("Histogram of", var), 
       xlab = var, 
       col = "#FFB400", 
       border = "white"
       )
})

[[1]]
$breaks
 [1] 1.0 1.2 1.4 1.6 1.8 2.0 2.2 2.4 2.6 2.8 3.0 3.2 3.4 3.6 3.8 4.0

$counts
 [1] 207   0   0   0 229   0   0   0   0 202   0   0   0   0 243

$density
 [1] 1.174801 0.000000 0.000000 0.000000 1.299659 0.000000 0.000000 0.000000 0.000000 1.146425 0.000000 0.000000 0.000000 0.000000
[15] 1.379115

$mids
 [1] 1.1 1.3 1.5 1.7 1.9 2.1 2.3 2.5 2.7 2.9 3.1 3.3 3.5 3.7 3.9

$xname
[1] "cleaned_airbnb_data[[var]]"

$equidist
[1] TRUE

attr(,"class")
[1] "histogram"

[[2]]
$breaks
[1] 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0

$counts
[1] 182 176   0 173   0 166   0 184

$density
[1] 0.4131669 0.3995460 0.0000000 0.3927355 0.0000000 0.3768445 0.0000000 0.4177072

$mids
[1] 1.25 1.75 2.25 2.75 3.25 3.75 4.25 4.75

$xname
[1] "cleaned_airbnb_data[[var]]"

$equidist
[1] TRUE

attr(,"class")
[1] "histogram"

[[3]]
$breaks
 [1] 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 5.5 6.0

$counts
 [1] 180 178   0 160   0 179   0 127   0  57

$density
 [1] 0.4086266 0.4040863 0.0000000 0.3632236 0.0000000 0.4063564 0.0000000 0.2883087 0.0000000 0.1293984

$mids
 [1] 1.25 1.75 2.25 2.75 3.25 3.75 4.25 4.75 5.25 5.75

$xname
[1] "cleaned_airbnb_data[[var]]"

$equidist
[1] TRUE

attr(,"class")
[1] "histogram"

[[4]]
$breaks
 [1]  5 10 15 20 25 30 35 40 45 50

$counts
[1] 107  98  78 115  84 102 110  96  91

$density
[1] 0.02429058 0.02224745 0.01770715 0.02610670 0.01906924 0.02315551 0.02497162 0.02179342 0.02065834

$mids
[1]  7.5 12.5 17.5 22.5 27.5 32.5 37.5 42.5 47.5

$xname
[1] "cleaned_airbnb_data[[var]]"

$equidist
[1] TRUE

attr(,"class")
[1] "histogram"

[[5]]
$breaks
[1] 1 2 3 4 5 6 7 8 9

$counts
[1] 191 120  86  94  81 107 107  95

$density
[1] 0.21679909 0.13620885 0.09761635 0.10669694 0.09194098 0.12145289 0.12145289 0.10783201

$mids
[1] 1.5 2.5 3.5 4.5 5.5 6.5 7.5 8.5

$xname
[1] "cleaned_airbnb_data[[var]]"

$equidist
[1] TRUE

attr(,"class")
[1] "histogram"

[[6]]
$breaks
 [1]    0  100  200  300  400  500  600  700  800  900 1000 1100 1200

$counts
 [1] 54 74 84 92 71 66 73 84 92 73 91 27

$density
 [1] 0.0006129398 0.0008399546 0.0009534620 0.0010442679 0.0008059024 0.0007491487 0.0008286039 0.0009534620 0.0010442679
[10] 0.0008286039 0.0010329171 0.0003064699

$mids
 [1]   50  150  250  350  450  550  650  750  850  950 1050 1150

$xname
[1] "cleaned_airbnb_data[[var]]"

$equidist
[1] TRUE

attr(,"class")
[1] "histogram"

[[7]]
$breaks
 [1]  0  2  4  6  8 10 12 14 16 18 20 22 24 26

$counts
 [1]   5  21  89 183 236 178  96  51  13   6   1   1   1

$density
 [1] 0.0028376844 0.0119182747 0.0505107832 0.1038592509 0.1339387060 0.1010215664 0.0544835414 0.0289443814 0.0073779796
[10] 0.0034052213 0.0005675369 0.0005675369 0.0005675369

$mids
 [1]  1  3  5  7  9 11 13 15 17 19 21 23 25

$xname
[1] "cleaned_airbnb_data[[var]]"

$equidist
[1] TRUE

attr(,"class")
[1] "histogram"

[[8]]
$breaks
[1] 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0

$counts
[1] 123 232   0 199   0 214   0 113

$density
[1] 0.2792281 0.5266742 0.0000000 0.4517594 0.0000000 0.4858116 0.0000000 0.2565267

$mids
[1] 1.25 1.75 2.25 2.75 3.25 3.75 4.25 4.75

$xname
[1] "cleaned_airbnb_data[[var]]"

$equidist
[1] TRUE

attr(,"class")
[1] "histogram"

[[9]]
$breaks
 [1]  50 100 150 200 250 300 350 400 450 500 550 600

$counts
 [1]   3   3  36 102 206 258 163  76  28   5   1

$density
 [1] 6.810443e-05 6.810443e-05 8.172531e-04 2.315551e-03 4.676504e-03 5.856981e-03 3.700341e-03 1.725312e-03 6.356413e-04
[10] 1.135074e-04 2.270148e-05

$mids
 [1]  75 125 175 225 275 325 375 425 475 525 575

$xname
[1] "cleaned_airbnb_data[[var]]"

$equidist
[1] TRUE

attr(,"class")
[1] "histogram"

par(mfrow = c(2, 4)) # 2 rows, 4 columns

# Create a boxplot for each variable
lapply(numeric_vars, function(var) {
  boxplot(cleaned_airbnb_data[[var]], 
          main = paste("Boxplot of", var), 
          ylab = var, 
          col = "#00A699")
})

[[1]]
[[1]]$stats
     [,1]
[1,]    1
[2,]    2
[3,]    3
[4,]    4
[5,]    4

[[1]]$n
[1] 881

[[1]]$conf
         [,1]
[1,] 2.893537
[2,] 3.106463

[[1]]$out
numeric(0)

[[1]]$group
numeric(0)

[[1]]$names
[1] ""


[[2]]
[[2]]$stats
     [,1]
[1,]    1
[2,]    2
[3,]    3
[4,]    4
[5,]    5

[[2]]$n
[1] 881

[[2]]$conf
         [,1]
[1,] 2.893537
[2,] 3.106463

[[2]]$out
numeric(0)

[[2]]$group
numeric(0)

[[2]]$names
[1] ""


[[3]]
[[3]]$stats
     [,1]
[1,]    1
[2,]    2
[3,]    3
[4,]    4
[5,]    6

[[3]]$n
[1] 881

[[3]]$conf
         [,1]
[1,] 2.893537
[2,] 3.106463

[[3]]$out
numeric(0)

[[3]]$group
numeric(0)

[[3]]$names
[1] ""


[[4]]
[[4]]$stats
     [,1]
[1,]    5
[2,]   16
[3,]   28
[4,]   39
[5,]   49

[[4]]$n
[1] 881

[[4]]$conf
         [,1]
[1,] 26.77567
[2,] 29.22433

[[4]]$out
numeric(0)

[[4]]$group
numeric(0)

[[4]]$names
[1] ""


[[5]]
[[5]]$stats
     [,1]
[1,]    1
[2,]    3
[3,]    5
[4,]    7
[5,]    9

[[5]]$n
[1] 881

[[5]]$conf
         [,1]
[1,] 4.787074
[2,] 5.212926

[[5]]$out
numeric(0)

[[5]]$group
numeric(0)

[[5]]$names
[1] ""


[[6]]
[[6]]$stats
     [,1]
[1,]   30
[2,]  308
[3,]  599
[4,]  868
[5,] 1124

[[6]]$n
[1] 881

[[6]]$conf
         [,1]
[1,] 569.1903
[2,] 628.8097

[[6]]$out
numeric(0)

[[6]]$group
numeric(0)

[[6]]$names
[1] ""


[[7]]
[[7]]$stats
     [,1]
[1,]    2
[2,]    8
[3,]   10
[4,]   12
[5,]   18

[[7]]$n
[1] 881

[[7]]$conf
          [,1]
[1,]  9.787074
[2,] 10.212926

[[7]]$out
 [1] 19 25 19  1 24  1 19 19 21 20 20

[[7]]$group
 [1] 1 1 1 1 1 1 1 1 1 1 1

[[7]]$names
[1] ""


[[8]]
[[8]]$stats
     [,1]
[1,]    1
[2,]    2
[3,]    3
[4,]    4
[5,]    5

[[8]]$n
[1] 881

[[8]]$conf
         [,1]
[1,] 2.893537
[2,] 3.106463

[[8]]$out
numeric(0)

[[8]]$group
numeric(0)

[[8]]$names
[1] ""


[[9]]
[[9]]$stats
     [,1]
[1,]  136
[2,]  272
[3,]  317
[4,]  364
[5,]  493

[[9]]$n
[1] 881

[[9]]$conf
         [,1]
[1,] 312.1027
[2,] 321.8973

[[9]]$out
 [1] 504  99 515  97 544 566  99 503 544 133

[[9]]$group
 [1] 1 1 1 1 1 1 1 1 1 1

[[9]]$names
[1] ""

# Set up margins to accommodate rotated labels
par(mfrow=c(1, length(categorical_vars)), mar=c(6, 5, 3, 2))

# Create the bar plots with rotated labels
for (i in 1:length(categorical_vars)) {
  counts <- table(cleaned_airbnb_data[[categorical_vars[i]]])
  if (length(counts) == 0 || all(counts == 0)) {
    next
  }
  barplot(counts, main=categorical_vars[i], col='#ff5a5f', border='black', ylim=c(0, max(counts) + 10), las = 2)
}

library(corrplot)
corrplot 0.95 loaded
selected_df_for_modelling <- select(cleaned_airbnb_data, -latitude, -longitude, -cancellation_policy, -property_type, -room_type)

correlation_matrix <- cor(selected_df_for_modelling)

# Draw correlation heatmap
corrplot(correlation_matrix, 
         method = "circle",                # Use circles to represent correlations
         type = "full",                   # Display only upper triangle
         tl.col = "darkblue",              # Set font color to dark blue
         tl.srt = 45,                      # Rotate labels for better readability
         tl.cex = 0.8,                     # Adjust font size of labels
         cl.cex = 0.8,                     # Adjust font size of color legend
         pch.cex = 1.2)                 # Set size for the correlation coefficient numbers

price_col_vars <- c("accommodates", "bathrooms", "bedrooms", "beds", "amenities_count")

# Check the class of each variable in numeric_vars
sapply(cleaned_airbnb_data [, price_col_vars], class)
   accommodates       bathrooms        bedrooms            beds amenities_count 
      "numeric"       "numeric"       "numeric"       "numeric"       "numeric" 
cleaned_airbnb_data[, price_col_vars] <- lapply(cleaned_airbnb_data[, price_col_vars], as.numeric)

for (var in price_col_vars) {
  plot(
    as.numeric(cleaned_airbnb_data[[var]]), 
    as.numeric(cleaned_airbnb_data$price), 
    main = paste("Relationship between", var, "and Price"), 
    xlab = var, 
    ylab = "Price", 
    col = "#00A699", 
    pch = 19
  )
}

library(ggplot2)

# Scatter plot with regression line
ggplot(cleaned_airbnb_data, aes(x = amenities_count , y = price)) +
  geom_point(color = "#00A699", alpha = 0.6) +
  geom_smooth(method = "lm", color = "#FF5A5F") +
  labs(title = "Price vs Number of Amenities", x = "Number of Amenities", y = "Price") +
  theme_minimal()

# Scatter plot with regression line
ggplot(cleaned_airbnb_data, aes(x = number_of_reviews , y = price)) +
  geom_point(color = "#00A699", alpha = 0.6) +
  geom_smooth(method = "lm", color = "#FF5A5F") +
  labs(title = "Price vs Number of Reviews", x = "Number of Reviews", y = "Price") +
  theme_minimal()

ggplot(cleaned_airbnb_data, aes(x = bedrooms, y = beds)) +
  geom_area(color = "#ffb400", fill = "#ffb400") +
  labs(title = "Area Chart for Beds and Bedrooms",
       x = "Bedrooms",
       y = "Beds") +
  theme_minimal()

# Density plot of price by room type
ggplot(cleaned_airbnb_data, aes(x = price, fill = room_type,)) +
  geom_density(alpha = 0.5) +
  labs(title = "Price Density by Room Type", x = "Price", y = "Density") +
  theme_minimal()

world_map <- map_data("state")

ggplot() +
  geom_polygon(data = world_map, aes(x = long, y = lat, group = group), fill = "white") + 
  geom_point(data = cleaned_airbnb_data, aes(x = longitude, y = latitude, size = review_scores_rating, color = price))

ggplot(cleaned_airbnb_data, aes(x = room_type, fill = factor(review_scores_rating))) +
  geom_bar(position = "fill") +
  labs(
    title = "Distribution of Review Score Rating by Room Type and Property Type",
    x = "Room Type",
    y = "Count",
    fill = "Review Score Rating"
  ) +
  scale_fill_manual(values = airbnb_palette) +
  theme_minimal() 

ggplot(cleaned_airbnb_data, aes(x = cancellation_policy , fill = factor(review_scores_rating))) +
  geom_bar(position = "fill") +
  labs(
    title = "Distribution of Review Score Rating by Cancellation Policy",
    x = "Cancellation Policy",
    y = "Count",
    fill = "Review Score Rating"
  ) +
  scale_fill_manual(values = airbnb_palette) +
  theme_minimal() 

# Create the plot
ggplot(data = cleaned_airbnb_data, mapping = aes(x = amenities_count, y = price, color = room_type)) + 
  geom_point(size = 2) + 
  geom_smooth(mapping = aes(color = room_type), se = FALSE) + 
  scale_color_manual(values = c("Entire home/apt" = "#FF5A5F", 
                                "Private room" = "#00A699", 
                                "Shared room" = "#ffb400")) +
  theme_minimal() +
  labs(title = "Price vs. Amenities Count by Room Type",
       x = "Amenities Count",
       y = "Price",
       color = "Room Type")

# Boxplot for Price by Property Type
ggplot(cleaned_airbnb_data, aes(x = property_type, y = price)) +
  geom_boxplot(fill = "#ffb400", color = "black") +
  labs(title = "Price by Property Type", x = "Property Type", y = "Price") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))


# Boxplot for Price by Room Type
ggplot(cleaned_airbnb_data, aes(x = room_type, y = price)) +
  geom_boxplot(fill = "#ffb400", color = "black") +
  labs(title = "Price by Property Type", x = "Property Type", y = "Price") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Boxplot for host identity and review rating
ggplot(cleaned_airbnb_data, aes(x = factor(host_identity_verified), y = review_scores_rating)) +
  geom_boxplot(fill = "#ff5a5f", color = "black") +
  labs(title = "Rating by Host Veification", x = "Host Verification Status", y = "Rating") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

LS0tCnRpdGxlOiBBaXJibmIgRURBIChjbGVhbmluZywgZnJlcXVlbmN5IGRpc3RyaWJ1dGlvbiwgYm94IHBsb3RzLCBjb2xsZXJhdGlvbiBtYXRyaXgsCiAgc2NhdHRlcnBsb3RzKQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpgYGB7cn0KbGlicmFyeSh0aWR5dmVyc2UpCmBgYAoKCmBgYHtyfQphaXJibmJfcGFsZXR0ZSA8LSBjKCIjRkY1QTVGIiwgIiMwMEE2OTkiLCAiIzc2NzY3NiIsICIjNDg0ODQ4IiwgIiNGRkI0MDAiKQpgYGAKCgpgYGB7cn0KbWFpbl91bmNsZWFuZWRfZGF0YSA8LSByZWFkX2NzdigiYWlyYm5iMS5jc3YiKQoKaGVhZChtYWluX3VuY2xlYW5lZF9kYXRhKQoKc3VtbWFyeShtYWluX3VuY2xlYW5lZF9kYXRhKQoKZ2xpbXBzZShtYWluX3VuY2xlYW5lZF9kYXRhKQpgYGAKCmBgYHtyfQojIENhbGN1bGF0ZSB0aGUgcGVyY2VudGFnZSBvZiBtaXNzaW5nIGRhdGEgZm9yIGVhY2ggY29sdW1uCm1pc3NpbmdfZGF0YSA8LSBtYWluX3VuY2xlYW5lZF9kYXRhICU+JQogIHN1bW1hcmlzZShhY3Jvc3MoZXZlcnl0aGluZygpLCB+IG1lYW4oaXMubmEoLikpICogMTAwKSkgJT4lCiAgcGl2b3RfbG9uZ2VyKGNvbHMgPSBldmVyeXRoaW5nKCksIG5hbWVzX3RvID0gIlZhcmlhYmxlIiwgdmFsdWVzX3RvID0gIk1pc3NpbmdfUGVyY2VudGFnZSIpCgojIFBsb3QgdGhlIG1pc3NpbmcgZGF0YSBwZXJjZW50YWdlCmdncGxvdChtaXNzaW5nX2RhdGEsIGFlcyh4ID0gcmVvcmRlcihWYXJpYWJsZSwgLU1pc3NpbmdfUGVyY2VudGFnZSksIHkgPSBNaXNzaW5nX1BlcmNlbnRhZ2UpKSArCiAgZ2VvbV9iYXIoc3RhdCA9ICJpZGVudGl0eSIsIGZpbGwgPSAiI0ZmYjQwMCIpICsgCiAgbGFicyh0aXRsZSA9ICJQZXJjZW50YWdlIG9mIE1pc3NpbmcgRGF0YSBieSBDb2x1bW4iLCB4ID0gIlZhcmlhYmxlIiwgeSA9ICJNaXNzaW5nIFBlcmNlbnRhZ2UgKCUpIikgKwogIHRoZW1lX21pbmltYWwoKSArCiAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGUgPSA0NSkpICsKICBjb29yZF9mbGlwKCkKICAKYGBgCgoKCmBgYHtyfQp3aGljaChpcy5uYShtYWluX3VuY2xlYW5lZF9kYXRhKSkKCmNvbF9taXNzaW5nIDwtIGNvbFN1bXMoaXMubmEobWFpbl91bmNsZWFuZWRfZGF0YSkpCgpjb2xfbWlzc2luZwoKZHJvcHBlZF9uYSA8LSBtYWluX3VuY2xlYW5lZF9kYXRhICU+JSBkcm9wX25hKCkKCndoaWNoKGlzLm5hKGRyb3BwZWRfbmEpKSAjIGp1c3QgY2hlY2tpbmcgaWYgdGhlIGNsZWFuaW5nIHdvcmtlZCBvciBub3QKClExIDwtIHF1YW50aWxlKGRyb3BwZWRfbmEkYmVkcm9vbXMsIDAuMjUsIG5hLnJtID0gVFJVRSkKUTMgPC0gcXVhbnRpbGUoZHJvcHBlZF9uYSRiZWRyb29tcywgMC43NSwgbmEucm0gPSBUUlVFKQpJUVIgPC0gUTMgLSBRMQoKY2xlYW5lZF9haXJibmJfZGF0YSA8LSBkcm9wcGVkX25hICU+JQogIGZpbHRlcihiZWRyb29tcyA+PSAoUTEgLSAxLjUgKiBJUVIpICYgYmVkcm9vbXMgPD0gKFEzICsgMS41ICogSVFSKSkKCmdsaW1wc2UoY2xlYW5lZF9haXJibmJfZGF0YSkKCmBgYAoKYGBge3J9Cm51bWVyaWNfdmFycyA8LSBjKCJiYXRocm9vbXMiLCAiYmVkcm9vbXMiLCAiYmVkcyIsICJhbWVuaXRpZXNfY291bnQiLCAibWluaW11bV9uaWdodHMiLCAibWF4aW11bV9uaWdodHMiLCAibnVtYmVyX29mX3Jldmlld3MiLCAicmV2aWV3X3Njb3Jlc19yYXRpbmciLCAicHJpY2UiKQoKY2F0ZWdvcmljYWxfdmFycyA8LSBjKCJwcm9wZXJ0eV90eXBlIiwgInJvb21fdHlwZSIsICJjYW5jZWxsYXRpb25fcG9saWN5IiwgImhvc3RfaWRlbnRpdHlfdmVyaWZlZCIpCmBgYAoKCmBgYHtyfQpwYXIobWZyb3cgPSBjKDIsIDQpKSAjIFNldCB1cCBwbG90dGluZyBsYXlvdXQKCiMgQ3JlYXRlIGEgaGlzdG9ncmFtIGZvciBlYWNoIHZhcmlhYmxlCmxhcHBseShudW1lcmljX3ZhcnMsIGZ1bmN0aW9uKHZhcikgewogIGhpc3QoZHJvcHBlZF9uYVtbdmFyXV0sIAogICAgICAgbWFpbiA9IHBhc3RlKCJIaXN0b2dyYW0gb2YiLCB2YXIpLCAKICAgICAgIHhsYWIgPSB2YXIsIAogICAgICAgY29sID0gIiNGRkI0MDAiLCAKICAgICAgIGJvcmRlciA9ICJ3aGl0ZSIKICAgICAgICkKfSkKYGBgCgpgYGB7cn0KcGFyKG1mcm93ID0gYygyLCA0KSkgIyAyIHJvd3MsIDQgY29sdW1ucwoKIyBDcmVhdGUgYSBib3hwbG90IGZvciBlYWNoIHZhcmlhYmxlCmxhcHBseShudW1lcmljX3ZhcnMsIGZ1bmN0aW9uKHZhcikgewogIGJveHBsb3QoZHJvcHBlZF9uYVtbdmFyXV0sIAogICAgICAgICAgbWFpbiA9IHBhc3RlKCJCb3hwbG90IG9mIiwgdmFyKSwgCiAgICAgICAgICB5bGFiID0gdmFyLCAKICAgICAgICAgIGNvbCA9ICIjMDBBNjk5IikKfSkKYGBgCgpUaGVyZSBhcmUgbmVnYXRpdmUgcHJpY2Ugb3V0bGllcnMsIHNvIHdlJ2xsIGZpbHRlciB0aGF0IG91dApgYGB7cn0KIyByZW1vdmluZyBuZWdhdGl2ZSBwcmljZSB2YWx1ZXMKCmNsZWFuZWRfYWlyYm5iX2RhdGEgPC0gY2xlYW5lZF9haXJibmJfZGF0YSAlPiUKICBmaWx0ZXIocHJpY2UgPiAwKQpgYGAKCgpgYGB7cn0KcGFyKG1mcm93ID0gYygyLCA0KSkgIyBTZXQgdXAgcGxvdHRpbmcgbGF5b3V0CgojIENyZWF0ZSBhIGhpc3RvZ3JhbSBmb3IgZWFjaCB2YXJpYWJsZQpsYXBwbHkobnVtZXJpY192YXJzLCBmdW5jdGlvbih2YXIpIHsKICBoaXN0KGNsZWFuZWRfYWlyYm5iX2RhdGFbW3Zhcl1dLCAKICAgICAgIG1haW4gPSBwYXN0ZSgiSGlzdG9ncmFtIG9mIiwgdmFyKSwgCiAgICAgICB4bGFiID0gdmFyLCAKICAgICAgIGNvbCA9ICIjRkZCNDAwIiwgCiAgICAgICBib3JkZXIgPSAid2hpdGUiCiAgICAgICApCn0pCmBgYAoKCgpgYGB7cn0KcGFyKG1mcm93ID0gYygyLCA0KSkgIyAyIHJvd3MsIDQgY29sdW1ucwoKIyBDcmVhdGUgYSBib3hwbG90IGZvciBlYWNoIHZhcmlhYmxlCmxhcHBseShudW1lcmljX3ZhcnMsIGZ1bmN0aW9uKHZhcikgewogIGJveHBsb3QoY2xlYW5lZF9haXJibmJfZGF0YVtbdmFyXV0sIAogICAgICAgICAgbWFpbiA9IHBhc3RlKCJCb3hwbG90IG9mIiwgdmFyKSwgCiAgICAgICAgICB5bGFiID0gdmFyLCAKICAgICAgICAgIGNvbCA9ICIjMDBBNjk5IikKfSkKYGBgCgpgYGB7cn0KIyBTZXQgdXAgbWFyZ2lucyB0byBhY2NvbW1vZGF0ZSByb3RhdGVkIGxhYmVscwpwYXIobWZyb3c9YygxLCBsZW5ndGgoY2F0ZWdvcmljYWxfdmFycykpLCBtYXI9Yyg2LCA1LCAzLCAyKSkKCiMgQ3JlYXRlIHRoZSBiYXIgcGxvdHMgd2l0aCByb3RhdGVkIGxhYmVscwpmb3IgKGkgaW4gMTpsZW5ndGgoY2F0ZWdvcmljYWxfdmFycykpIHsKICBjb3VudHMgPC0gdGFibGUoY2xlYW5lZF9haXJibmJfZGF0YVtbY2F0ZWdvcmljYWxfdmFyc1tpXV1dKQogIGlmIChsZW5ndGgoY291bnRzKSA9PSAwIHx8IGFsbChjb3VudHMgPT0gMCkpIHsKICAgIG5leHQKICB9CiAgYmFycGxvdChjb3VudHMsIG1haW49Y2F0ZWdvcmljYWxfdmFyc1tpXSwgY29sPScjZmY1YTVmJywgYm9yZGVyPSdibGFjaycsIHlsaW09YygwLCBtYXgoY291bnRzKSArIDEwKSwgbGFzID0gMikKfQpgYGAKCgpgYGB7cn0KbGlicmFyeShjb3JycGxvdCkKCnNlbGVjdGVkX2RmX2Zvcl9tb2RlbGxpbmcgPC0gc2VsZWN0KGNsZWFuZWRfYWlyYm5iX2RhdGEsIC1sYXRpdHVkZSwgLWxvbmdpdHVkZSwgLWNhbmNlbGxhdGlvbl9wb2xpY3ksIC1wcm9wZXJ0eV90eXBlLCAtcm9vbV90eXBlKQoKY29ycmVsYXRpb25fbWF0cml4IDwtIGNvcihzZWxlY3RlZF9kZl9mb3JfbW9kZWxsaW5nKQoKIyBEcmF3IGNvcnJlbGF0aW9uIGhlYXRtYXAKY29ycnBsb3QoY29ycmVsYXRpb25fbWF0cml4LCAKICAgICAgICAgbWV0aG9kID0gImNpcmNsZSIsICAgICAgICAgICAgICAgICMgVXNlIGNpcmNsZXMgdG8gcmVwcmVzZW50IGNvcnJlbGF0aW9ucwogICAgICAgICB0eXBlID0gImZ1bGwiLCAgICAgICAgICAgICAgICAgICAjIERpc3BsYXkgb25seSB1cHBlciB0cmlhbmdsZQogICAgICAgICB0bC5jb2wgPSAiZGFya2JsdWUiLCAgICAgICAgICAgICAgIyBTZXQgZm9udCBjb2xvciB0byBkYXJrIGJsdWUKICAgICAgICAgdGwuc3J0ID0gNDUsICAgICAgICAgICAgICAgICAgICAgICMgUm90YXRlIGxhYmVscyBmb3IgYmV0dGVyIHJlYWRhYmlsaXR5CiAgICAgICAgIHRsLmNleCA9IDAuOCwgICAgICAgICAgICAgICAgICAgICAjIEFkanVzdCBmb250IHNpemUgb2YgbGFiZWxzCiAgICAgICAgIGNsLmNleCA9IDAuOCwgICAgICAgICAgICAgICAgICAgICAjIEFkanVzdCBmb250IHNpemUgb2YgY29sb3IgbGVnZW5kCiAgICAgICAgIHBjaC5jZXggPSAxLjIpICAgICAgICAgICAgICAgICAjIFNldCBzaXplIGZvciB0aGUgY29ycmVsYXRpb24gY29lZmZpY2llbnQgbnVtYmVycwpgYGAKCmBgYHtyfQpwcmljZV9jb2xfdmFycyA8LSBjKCJhY2NvbW1vZGF0ZXMiLCAiYmF0aHJvb21zIiwgImJlZHJvb21zIiwgImJlZHMiLCAiYW1lbml0aWVzX2NvdW50IikKCiMgQ2hlY2sgdGhlIGNsYXNzIG9mIGVhY2ggdmFyaWFibGUgaW4gbnVtZXJpY192YXJzCnNhcHBseShjbGVhbmVkX2FpcmJuYl9kYXRhIFssIHByaWNlX2NvbF92YXJzXSwgY2xhc3MpCmNsZWFuZWRfYWlyYm5iX2RhdGFbLCBwcmljZV9jb2xfdmFyc10gPC0gbGFwcGx5KGNsZWFuZWRfYWlyYm5iX2RhdGFbLCBwcmljZV9jb2xfdmFyc10sIGFzLm51bWVyaWMpCgpmb3IgKHZhciBpbiBwcmljZV9jb2xfdmFycykgewogIHBsb3QoCiAgICBhcy5udW1lcmljKGNsZWFuZWRfYWlyYm5iX2RhdGFbW3Zhcl1dKSwgCiAgICBhcy5udW1lcmljKGNsZWFuZWRfYWlyYm5iX2RhdGEkcHJpY2UpLCAKICAgIG1haW4gPSBwYXN0ZSgiUmVsYXRpb25zaGlwIGJldHdlZW4iLCB2YXIsICJhbmQgUHJpY2UiKSwgCiAgICB4bGFiID0gdmFyLCAKICAgIHlsYWIgPSAiUHJpY2UiLCAKICAgIGNvbCA9ICIjMDBBNjk5IiwgCiAgICBwY2ggPSAxOQogICkKfQpgYGAKCmBgYHtyfQpsaWJyYXJ5KGdncGxvdDIpCgojIFNjYXR0ZXIgcGxvdCB3aXRoIHJlZ3Jlc3Npb24gbGluZQpnZ3Bsb3QoY2xlYW5lZF9haXJibmJfZGF0YSwgYWVzKHggPSBhbWVuaXRpZXNfY291bnQgLCB5ID0gcHJpY2UpKSArCiAgZ2VvbV9wb2ludChjb2xvciA9ICIjMDBBNjk5IiwgYWxwaGEgPSAwLjYpICsKICBnZW9tX3Ntb290aChtZXRob2QgPSAibG0iLCBjb2xvciA9ICIjRkY1QTVGIikgKwogIGxhYnModGl0bGUgPSAiUHJpY2UgdnMgTnVtYmVyIG9mIEFtZW5pdGllcyIsIHggPSAiTnVtYmVyIG9mIEFtZW5pdGllcyIsIHkgPSAiUHJpY2UiKSArCiAgdGhlbWVfbWluaW1hbCgpCgpgYGAKCmBgYHtyfQojIFNjYXR0ZXIgcGxvdCB3aXRoIHJlZ3Jlc3Npb24gbGluZQpnZ3Bsb3QoY2xlYW5lZF9haXJibmJfZGF0YSwgYWVzKHggPSBudW1iZXJfb2ZfcmV2aWV3cyAsIHkgPSBwcmljZSkpICsKICBnZW9tX3BvaW50KGNvbG9yID0gIiMwMEE2OTkiLCBhbHBoYSA9IDAuNikgKwogIGdlb21fc21vb3RoKG1ldGhvZCA9ICJsbSIsIGNvbG9yID0gIiNGRjVBNUYiKSArCiAgbGFicyh0aXRsZSA9ICJQcmljZSB2cyBOdW1iZXIgb2YgUmV2aWV3cyIsIHggPSAiTnVtYmVyIG9mIFJldmlld3MiLCB5ID0gIlByaWNlIikgKwogIHRoZW1lX21pbmltYWwoKQpgYGAKCmBgYHtyfQpnZ3Bsb3QoY2xlYW5lZF9haXJibmJfZGF0YSwgYWVzKHggPSBiZWRyb29tcywgeSA9IGJlZHMpKSArCiAgZ2VvbV9hcmVhKGNvbG9yID0gIiNmZmI0MDAiLCBmaWxsID0gIiNmZmI0MDAiKSArCiAgbGFicyh0aXRsZSA9ICJBcmVhIENoYXJ0IGZvciBCZWRzIGFuZCBCZWRyb29tcyIsCiAgICAgICB4ID0gIkJlZHJvb21zIiwKICAgICAgIHkgPSAiQmVkcyIpICsKICB0aGVtZV9taW5pbWFsKCkKYGBgCgpgYGB7cn0KIyBEZW5zaXR5IHBsb3Qgb2YgcHJpY2UgYnkgcm9vbSB0eXBlCmdncGxvdChjbGVhbmVkX2FpcmJuYl9kYXRhLCBhZXMoeCA9IHByaWNlLCBmaWxsID0gcm9vbV90eXBlLCkpICsKICBnZW9tX2RlbnNpdHkoYWxwaGEgPSAwLjUpICsKICBsYWJzKHRpdGxlID0gIlByaWNlIERlbnNpdHkgYnkgUm9vbSBUeXBlIiwgeCA9ICJQcmljZSIsIHkgPSAiRGVuc2l0eSIpICsKICB0aGVtZV9taW5pbWFsKCkKCmBgYAoKYGBge3J9CndvcmxkX21hcCA8LSBtYXBfZGF0YSgic3RhdGUiKQoKZ2dwbG90KCkgKwogIGdlb21fcG9seWdvbihkYXRhID0gd29ybGRfbWFwLCBhZXMoeCA9IGxvbmcsIHkgPSBsYXQsIGdyb3VwID0gZ3JvdXApLCBmaWxsID0gIndoaXRlIikgKyAKICBnZW9tX3BvaW50KGRhdGEgPSBjbGVhbmVkX2FpcmJuYl9kYXRhLCBhZXMoeCA9IGxvbmdpdHVkZSwgeSA9IGxhdGl0dWRlLCBzaXplID0gcmV2aWV3X3Njb3Jlc19yYXRpbmcsIGNvbG9yID0gcHJpY2UpKQpgYGAKCmBgYHtyfQpnZ3Bsb3QoY2xlYW5lZF9haXJibmJfZGF0YSwgYWVzKHggPSByb29tX3R5cGUsIGZpbGwgPSBmYWN0b3IocmV2aWV3X3Njb3Jlc19yYXRpbmcpKSkgKwogIGdlb21fYmFyKHBvc2l0aW9uID0gImZpbGwiKSArCiAgbGFicygKICAgIHRpdGxlID0gIkRpc3RyaWJ1dGlvbiBvZiBSZXZpZXcgU2NvcmUgUmF0aW5nIGJ5IFJvb20gVHlwZSBhbmQgUHJvcGVydHkgVHlwZSIsCiAgICB4ID0gIlJvb20gVHlwZSIsCiAgICB5ID0gIkNvdW50IiwKICAgIGZpbGwgPSAiUmV2aWV3IFNjb3JlIFJhdGluZyIKICApICsKICBzY2FsZV9maWxsX21hbnVhbCh2YWx1ZXMgPSBhaXJibmJfcGFsZXR0ZSkgKwogIHRoZW1lX21pbmltYWwoKSAKYGBgCgpgYGB7cn0KZ2dwbG90KGNsZWFuZWRfYWlyYm5iX2RhdGEsIGFlcyh4ID0gY2FuY2VsbGF0aW9uX3BvbGljeSAsIGZpbGwgPSBmYWN0b3IocmV2aWV3X3Njb3Jlc19yYXRpbmcpKSkgKwogIGdlb21fYmFyKHBvc2l0aW9uID0gImZpbGwiKSArCiAgbGFicygKICAgIHRpdGxlID0gIkRpc3RyaWJ1dGlvbiBvZiBSZXZpZXcgU2NvcmUgUmF0aW5nIGJ5IENhbmNlbGxhdGlvbiBQb2xpY3kiLAogICAgeCA9ICJDYW5jZWxsYXRpb24gUG9saWN5IiwKICAgIHkgPSAiQ291bnQiLAogICAgZmlsbCA9ICJSZXZpZXcgU2NvcmUgUmF0aW5nIgogICkgKwogIHNjYWxlX2ZpbGxfbWFudWFsKHZhbHVlcyA9IGFpcmJuYl9wYWxldHRlKSArCiAgdGhlbWVfbWluaW1hbCgpIApgYGAKCgpgYGB7cn0KIyBDcmVhdGUgdGhlIHBsb3QKZ2dwbG90KGRhdGEgPSBjbGVhbmVkX2FpcmJuYl9kYXRhLCBtYXBwaW5nID0gYWVzKHggPSBhbWVuaXRpZXNfY291bnQsIHkgPSBwcmljZSwgY29sb3IgPSByb29tX3R5cGUpKSArIAogIGdlb21fcG9pbnQoc2l6ZSA9IDIpICsgCiAgZ2VvbV9zbW9vdGgobWFwcGluZyA9IGFlcyhjb2xvciA9IHJvb21fdHlwZSksIHNlID0gRkFMU0UpICsgCiAgc2NhbGVfY29sb3JfbWFudWFsKHZhbHVlcyA9IGMoIkVudGlyZSBob21lL2FwdCIgPSAiI0ZGNUE1RiIsIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICJQcml2YXRlIHJvb20iID0gIiMwMEE2OTkiLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAiU2hhcmVkIHJvb20iID0gIiNmZmI0MDAiKSkgKwogIHRoZW1lX21pbmltYWwoKSArCiAgbGFicyh0aXRsZSA9ICJQcmljZSB2cy4gQW1lbml0aWVzIENvdW50IGJ5IFJvb20gVHlwZSIsCiAgICAgICB4ID0gIkFtZW5pdGllcyBDb3VudCIsCiAgICAgICB5ID0gIlByaWNlIiwKICAgICAgIGNvbG9yID0gIlJvb20gVHlwZSIpCmBgYAoKYGBge3J9CiMgQm94cGxvdCBmb3IgUHJpY2UgYnkgUHJvcGVydHkgVHlwZQpnZ3Bsb3QoY2xlYW5lZF9haXJibmJfZGF0YSwgYWVzKHggPSBwcm9wZXJ0eV90eXBlLCB5ID0gcHJpY2UpKSArCiAgZ2VvbV9ib3hwbG90KGZpbGwgPSAiI2ZmYjQwMCIsIGNvbG9yID0gImJsYWNrIikgKwogIGxhYnModGl0bGUgPSAiUHJpY2UgYnkgUHJvcGVydHkgVHlwZSIsIHggPSAiUHJvcGVydHkgVHlwZSIsIHkgPSAiUHJpY2UiKSArCiAgdGhlbWVfbWluaW1hbCgpICsKICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZSA9IDQ1LCBoanVzdCA9IDEpKQoKIyBCb3hwbG90IGZvciBQcmljZSBieSBSb29tIFR5cGUKZ2dwbG90KGNsZWFuZWRfYWlyYm5iX2RhdGEsIGFlcyh4ID0gcm9vbV90eXBlLCB5ID0gcHJpY2UpKSArCiAgZ2VvbV9ib3hwbG90KGZpbGwgPSAiI2ZmYjQwMCIsIGNvbG9yID0gImJsYWNrIikgKwogIGxhYnModGl0bGUgPSAiUHJpY2UgYnkgUHJvcGVydHkgVHlwZSIsIHggPSAiUHJvcGVydHkgVHlwZSIsIHkgPSAiUHJpY2UiKSArCiAgdGhlbWVfbWluaW1hbCgpICsKICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZSA9IDQ1LCBoanVzdCA9IDEpKQoKYGBgCgpgYGB7cn0KIyBCb3hwbG90IGZvciBob3N0IGlkZW50aXR5IGFuZCByZXZpZXcgcmF0aW5nCmdncGxvdChjbGVhbmVkX2FpcmJuYl9kYXRhLCBhZXMoeCA9IGZhY3Rvcihob3N0X2lkZW50aXR5X3ZlcmlmaWVkKSwgeSA9IHJldmlld19zY29yZXNfcmF0aW5nKSkgKwogIGdlb21fYm94cGxvdChmaWxsID0gIiNmZjVhNWYiLCBjb2xvciA9ICJibGFjayIpICsKICBsYWJzKHRpdGxlID0gIlJhdGluZyBieSBIb3N0IFZlaWZpY2F0aW9uIiwgeCA9ICJIb3N0IFZlcmlmaWNhdGlvbiBTdGF0dXMiLCB5ID0gIlJhdGluZyIpICsKICB0aGVtZV9taW5pbWFsKCkgKwogIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGFuZ2xlID0gNDUsIGhqdXN0ID0gMSkpCgpgYGAKCmBgYHtyfQojIEJveHBsb3QgZm9yIGhvc3QgaWRlbnRpdHkgYW5kIHJldmlldyByYXRpbmcKZ2dwbG90KGNsZWFuZWRfYWlyYm5iX2RhdGEsIGFlcyh4ID0gZmFjdG9yKGhvc3RfaWRlbnRpdHlfdmVyaWZpZWQpLCB5ID0gcHJpY2UpKSArCiAgZ2VvbV9ib3hwbG90KGZpbGwgPSAiI2ZmNWE1ZiIsIGNvbG9yID0gImJsYWNrIikgKwogIGxhYnModGl0bGUgPSAiUmF0aW5nIGJ5IEhvc3QgVmVpZmljYXRpb24iLCB4ID0gIkhvc3QgVmVyaWZpY2F0aW9uIFN0YXR1cyIsIHkgPSAiUmF0aW5nIikgKwogIHRoZW1lX21pbmltYWwoKSArCiAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGUgPSA0NSwgaGp1c3QgPSAxKSkKYGBgCg==